library(tidyverse)

#use nikkei225 daily data of 2019/1/4-2021/1/22
dt<-read.csv('nikkei225.csv')
dt$Date<-as.Date(dt$Date,format = '%Y-%m-%d')
head(dt)

#show the trend of the time series
library(ggplot2)
library(grid)
ggplot(dt, aes(x = Date , y = Close)) +
  geom_rect(aes(xmin = as.Date('2020-04-07'), xmax = as.Date('2020-05-26'), ymin = -Inf, ymax = Inf), fill = "grey", alpha = 0.3)+
  geom_line(color='royalblue')+
  labs(x="Date(Y/M)", y="closed price")+
  geom_vline(xintercept=as.Date(c("2019-11-17","2020-1-3","2020-11-3")),size=0.4,linetype="dashed")+
  geom_text(aes(x = as.Date("2019-11-10"),y=28000,label = 'First case of Covid−19 in China \u2192'), 
            hjust = 1, vjust = 1, size = 2.5)+
  geom_text(aes(x = as.Date("2020-1-1"),y=27000,label = 'First case of Covid−19 in Japan \u2192'), 
            hjust = 1, vjust = 1, size = 2.5)+
  geom_text(aes(x = as.Date("2020-4-7"),y=29000,label = 'First declaration of 
  a state of emergency 
                in Japan'), 
            hjust = 0.3, vjust = 1, size = 2.5)+
  geom_text(aes(x = as.Date("2020-11-3"),y=25000,label = 'U.S. Presidential Election \u2192'), 
            hjust = 1, vjust = 1, size = 2.5)+
  ggtitle("Closed price of NK225")


#apply the ar(1) model 
ar1<-ar(dt$Close, method="mle",order.max = 1)
sprintf("The estimated beta is %1.4f (s.e. = %1.4f)", ar1$ar, sqrt(ar1$asy.var.coef))
sprintf("The estimated squared sigma is %1.2f", ar1$var.pred)

#check the stationarity of the time series data using Dickey-Fuller test
library(tseries)
df1 <- adf.test(dt$Close, k = 1)
sprintf("The DF stats is %1.4f (p-value = %1.4f)", df1$statistic, df1$p.value) 
#we cannot reject the null hypothesis

#shift to log return series
dt<-dt %>% arrange(Date) %>% mutate(log_return=c(NA,diff(log(Close)))) 
head(dt[c(1,5,8)])

#show the trend of the log return
ggplot(na.omit(dt), aes(x = Date , y = log_return)) +
  geom_rect(aes(xmin = as.Date('2020-04-07'), xmax = as.Date('2020-05-26'), ymin = -Inf, ymax = Inf), fill = "grey", alpha = 0.3)+
  geom_line(color='royalblue')+
  labs(x="Date(Y/M)", y="closed price")+
  geom_vline(xintercept=as.Date(c("2019-11-17","2020-1-3","2020-11-3")),size=0.4,linetype="dashed")+
  geom_text(aes(x = as.Date("2019-11-10"),y=0.05,label = 'First case of Covid−19 in China \u2192'), 
            hjust = 1, vjust = 1, size = 2.5)+
  geom_text(aes(x = as.Date("2020-1-1"),y=0.04,label = 'First case of Covid−19 in Japan \u2192'), 
            hjust = 1, vjust = 1, size = 2.5)+
  geom_text(aes(x = as.Date("2020-4-7"),y=0.07,label = 'First declaration of 
  a state of emergency 
                in Japan'), 
            hjust = 0.3, vjust = 1, size = 2.5)+
  geom_text(aes(x = as.Date("2020-11-3"),y=0.05,label = 'U.S. Presidential Election \u2192'), 
            hjust = 1, vjust = 1, size = 2.5)+
  ggtitle("Log return of NK225")

#check the stationarity of log return
df2 <- adf.test(na.omit(dt$log_return), k = 1)
sprintf("The DF stats is %1.4f (p-value = %1.4f)", df2$statistic, df2$p.value)
#we can reject the null hypothesis. This implies that the time series of the log return of Nikkei225 is stationary.
